In [1]:
import numpy as np
import tensorflow as tf

from __future__ import print_function

XOR Network

Data generation


In [2]:
def create_examples(N, batch_size):
    A = np.random.binomial(n=1, p=0.5, size=(batch_size, N))
    B = np.random.binomial(n=1, p=0.5, size=(batch_size, N,))

    X = np.zeros((batch_size, 2 *N,), dtype=np.float32)
    X[:,:N], X[:,N:] = A, B

    Y = (A ^ B).astype(np.float32)
    return X,Y

In [3]:
X, Y = create_examples(3, 2)
print(X[0,:3], "xor", X[0,3:],"equals", Y[0])
print(X[1,:3], "xor", X[1,3:],"equals", Y[1])


[ 0.  1.  0.] xor [ 1.  1.  1.] equals [ 1.  0.  1.]
[ 0.  0.  1.] xor [ 1.  1.  0.] equals [ 1.  1.  1.]

Xor cannot be solved with single layer of neural network


In [22]:
import math

class Layer(object):
    def __init__(self, input_size, output_size):
        tensor_b = tf.zeros((output_size,))
        self.b = tf.Variable(tensor_b)
        tensor_W = tf.random_uniform((input_size, output_size),
                                     -1.0 / math.sqrt(input_size),
                                     1.0 / math.sqrt(input_size))
        self.W = tf.Variable(tensor_W)

    def __call__(self, x):
        return tf.matmul(x, self.W) + self.b

In [105]:
tf.ops.reset_default_graph()
sess = tf.InteractiveSession()

In [106]:
N = 5
# x represents input data
x = tf.placeholder(tf.float32, (None, 2 * N), name="x")
# y_golden is a reference output data.
y_golden = tf.placeholder(tf.float32, (None, N), name="y")

layer1 = Layer(2 * N, N)
# y is a linear projection of x with nonlinearity applied to the result.
y = tf.nn.sigmoid(layer1(x))

# mean squared error over all examples and all N output dimensions.
cost = tf.reduce_mean(tf.square(y - y_golden))

# create a function that will optimize the neural network
optimizer = tf.train.AdagradOptimizer(learning_rate=0.3)
train_op = optimizer.minimize(cost)

# initialize the variables
sess.run(tf.initialize_all_variables())

In [107]:
for t in range(5000):
    example_x, example_y = create_examples(N, 10)
    cost_t, _ = sess.run([cost, train_op], {x: example_x, y_golden: example_y})
    if t % 500 == 0: 
        print(cost_t.mean())


0.262958
0.249229
0.259427
0.245061
0.252946
0.24782
0.250937
0.246418
0.246755
0.244774

Notice that the error is far from zero.

Actually network always predicts approximately $0.5$, regardless of input data. That yields error of about $0.25$, because we use mean squared error ($0.5^2 = 0.25$).


In [109]:
X, _ = create_examples(N, 3)
prediction = sess.run([y], {x: X})
print(X)
print(prediction)


[[ 1.  0.  1.  1.  1.  1.  0.  0.  1.  1.]
 [ 1.  0.  1.  1.  0.  1.  1.  1.  1.  1.]
 [ 0.  0.  1.  0.  1.  0.  0.  1.  1.  1.]]
[array([[ 0.56099683,  0.54470569,  0.4940519 ,  0.49518651,  0.54470527],
       [ 0.56658453,  0.52068532,  0.48442408,  0.4748241 ,  0.5073036 ],
       [ 0.53004831,  0.52866411,  0.48705727,  0.48926324,  0.53761232]], dtype=float32)]

Accuracy is not that hard to predict...


In [113]:
N_EXAMPLES = 1000
example_x, example_y = create_examples(N, N_EXAMPLES)
# one day I need to write a wrapper which will turn the expression
# below to:
#     tf.abs(y - y_golden) < 0.5
is_correct = tf.less_equal(tf.abs(y - y_golden), tf.constant(0.5))
accuracy = tf.reduce_mean(tf.cast(is_correct, "float"))

acc_result = sess.run(accuracy, {x: example_x, y_golden: example_y})
print("Accuracy over %d examples: %.0f %%" % (N_EXAMPLES, 100.0 * acc_result))


Accuracy over 1000 examples: 48 %

Xor Network with 2 layers


In [149]:
tf.ops.reset_default_graph()
sess = tf.InteractiveSession()


Exception AssertionError: AssertionError() in <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x7f56d57e8dd0>> ignored

In [150]:
N = 5
# we add a single hidden layer of size 12
# otherwise code is similar to above
HIDDEN_SIZE = 12

x = tf.placeholder(tf.float32, (None, 2 * N), name="x")
y_golden = tf.placeholder(tf.float32, (None, N), name="y")

layer1 = Layer(2 * N, HIDDEN_SIZE)
layer2 = Layer(HIDDEN_SIZE, N) # <------- HERE IT IS!

hidden_repr = tf.nn.tanh(layer1(x))
y = tf.nn.sigmoid(layer2(hidden_repr))

cost = tf.reduce_mean(tf.square(y - y_golden))

optimizer = tf.train.AdagradOptimizer(learning_rate=0.3)
train_op = optimizer.minimize(cost)
sess.run(tf.initialize_all_variables())

In [151]:
for t in range(5000):
    example_x, example_y = create_examples(N, 10)
    cost_t, _ = sess.run([cost, train_op], {x: example_x, y_golden: example_y})
    if t % 500 == 0: 
        print(cost_t.mean())


0.241089
0.240045
0.1631
0.0709099
0.0326128
0.0087687
0.00526247
0.00518266
0.00272845
0.00213744

This time the network works a tad better


In [156]:
X, Y = create_examples(N, 3)
prediction = sess.run([y], {x: X})
print(X)
print(Y)
print(prediction)


[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  1.  1.  1.  0.  1.  1.  1.]
 [ 0.  1.  1.  1.  0.  0.  0.  0.  1.  0.]]
[[ 0.  0.  0.  0.  1.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  1.  1.  0.  0.]]
[array([[ 0.10384335,  0.04389301,  0.05774897,  0.04509954,  0.9374879 ],
       [ 0.05130127,  0.02655722,  0.97246277,  0.03545236,  0.04168396],
       [ 0.03924223,  0.96327722,  0.96935028,  0.03265698,  0.0310236 ]], dtype=float32)]

In [152]:
N_EXAMPLES = 1000
example_x, example_y = create_examples(N, N_EXAMPLES)
is_correct = tf.less_equal(tf.abs(y - y_golden), tf.constant(0.5))
accuracy = tf.reduce_mean(tf.cast(is_correct, "float"))

acc_result = sess.run(accuracy, {x: example_x, y_golden: example_y})
print("Accuracy over %d examples: %.0f %%" % (N_EXAMPLES, 100.0 * acc_result))


Accuracy over 1000 examples: 100 %